#!/usr/bin/env python3

"""
    thermalctld
    Thermal control daemon for SONiC
"""

from enum import Enum, auto
import argparse
import signal
import sys
import threading
import time
from datetime import datetime

import sonic_platform
from sonic_py_common import daemon_base, logger
from sonic_py_common.task_base import ThreadTaskBase
from swsscommon import swsscommon


# TODO: Once we no longer support Python 2, we can eliminate this and get the
# name using the 'name' field (e.g., `signal.SIGINT.name`) starting with Python 3.5
SIGNALS_TO_NAMES_DICT = dict((getattr(signal, n), n)
                             for n in dir(signal) if n.startswith('SIG') and '_' not in n)

SYSLOG_IDENTIFIER = 'thermalctld'
NOT_AVAILABLE = 'N/A'
CHASSIS_INFO_KEY = 'chassis 1'
PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO'
INVALID_SLOT_OR_DPU = -1

ERR_UNKNOWN = 1
CHASSIS_GET_ERROR = 2

# Thermal control daemon is designed to never exit, it must always
# return non-zero exit code when exiting and so that supervisord will
# restart it automatically.
exit_code = ERR_UNKNOWN

# utility functions

def try_get(callback, default=NOT_AVAILABLE):
    """
    Handy function to invoke the callback and catch NotImplementedError
    :param callback: Callback to be invoked
    :param default: Default return value if exception occur
    :return: Default return value if exception occur else return value of the callback
    """
    try:
        ret = callback()
        if ret is None:
            ret = default
    except NotImplementedError:
        ret = default

    return ret


def update_entity_info(table, parent_name, key, device, device_index):
    fvs = swsscommon.FieldValuePairs(
        [('position_in_parent', str(try_get(device.get_position_in_parent, device_index))),
         ('parent_name', parent_name)])
    table.set(key, fvs)


class FanType(Enum):
    DRAWER = auto()
    PSU = auto()
    MODULE = auto()

class FanStatus(logger.Logger):
    absent_fan_count = 0
    faulty_fan_count = 0

    def __init__(self, fan=None, fan_type=FanType.DRAWER):
        """
        Initializer of FanStatus
        """
        super(FanStatus, self).__init__(SYSLOG_IDENTIFIER)

        self.fan = fan
        self.fan_type = fan_type
        self.presence = True
        self.status = True
        self.under_speed = False
        self.over_speed = False
        self.invalid_direction = False
        self.led_initialized = False

    @classmethod
    def get_bad_fan_count(cls):
        return cls.absent_fan_count + cls.faulty_fan_count

    @classmethod
    def reset_fan_counter(cls):
        cls.absent_fan_count = 0
        cls.faulty_fan_count = 0

    def set_presence(self, presence):
        """
        Set and cache Fan presence status
        :param presence: Fan presence status
        :return: True if status changed else False
        """
        if not presence and self.fan_type == FanType.DRAWER:
            FanStatus.absent_fan_count += 1

        if presence == self.presence:
            return False

        self.presence = presence
        return True

    def set_fault_status(self, status):
        """
        Set and cache Fan fault status
        :param status: Fan fault status, False indicate Fault
        :return: True if status changed else False
        """
        if not status:
            FanStatus.faulty_fan_count += 1

        if status == self.status:
            return False

        self.status = status
        return True

    def set_under_speed(self, is_under_speed):
        """
        Set and cache Fan under speed status
        :param is_under_speed: Fan under speed threshold status
        :return: True if status changed else False
        """
        if is_under_speed == NOT_AVAILABLE:
            if self.under_speed:
                self.log_warning('Fan under speed threshold check became unavailable')
            is_under_speed = False

        old_status = self.under_speed
        self.under_speed = is_under_speed
        return old_status != self.under_speed

    def set_over_speed(self, is_over_speed):
        """
        Set and cache Fan over speed status
        :param is_over_speed: Fan over speed threshold status
        :return: True if status changed else False
        """
        if is_over_speed == NOT_AVAILABLE:
            if self.over_speed:
                self.log_warning('Fan over speed threshold check became unavailable')
            is_over_speed = False

        old_status = self.over_speed
        self.over_speed = is_over_speed
        return old_status != self.over_speed

    def is_ok(self):
        """
        Indicate the Fan works as expect
        :return: True if Fan works normal else False
        """
        return (self.presence and
                self.status and
                not self.under_speed and
                not self.over_speed and
                not self.invalid_direction)


#
# FanUpdater ===================================================================
#
class FanUpdater(logger.Logger):
    # Fan information table name in database
    FAN_INFO_TABLE_NAME = 'FAN_INFO'
    FAN_DRAWER_INFO_TABLE_NAME = 'FAN_DRAWER_INFO'

    def __init__(self, chassis, task_stopping_event):
        """
        Initializer for FanUpdater
        :param chassis: Object representing a platform chassis
        """
        super(FanUpdater, self).__init__(SYSLOG_IDENTIFIER)

        self.chassis = chassis
        self.task_stopping_event = task_stopping_event
        self.fan_status_dict = {}
        state_db = daemon_base.db_connect("STATE_DB")
        self.table = swsscommon.Table(state_db, FanUpdater.FAN_INFO_TABLE_NAME)
        self.drawer_table = swsscommon.Table(state_db, FanUpdater.FAN_DRAWER_INFO_TABLE_NAME)
        self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE)

    def __del__(self):
        if self.table:
            table_keys = self.table.getKeys()
            for tk in table_keys:
                self.table._del(tk)
        if self.drawer_table:
            drawer_keys = self.drawer_table.getKeys()
            for dtk in drawer_keys:
                self.drawer_table._del(dtk)
        if self.phy_entity_table:
            phy_entity_keys = self.phy_entity_table.getKeys()
            for pek in phy_entity_keys:
                self.phy_entity_table._del(pek)

    def _log_on_status_changed(self, normal_status, normal_log, abnormal_log):
        """
        Log when any status changed
        :param normal_status: Expected status.
        :param normal_log: Log string for expected status.
        :param abnormal_log: Log string for unexpected status
        :return:
        """
        if normal_status:
            self.log_notice(normal_log)
        else:
            self.log_warning(abnormal_log)

    def update(self):
        """
        Update all Fan information to database
        :return:
        """
        self.log_debug("Start fan updating")
        old_bad_fan_count = FanStatus.get_bad_fan_count()
        FanStatus.reset_fan_counter()

        for drawer_index, drawer in enumerate(self.chassis.get_all_fan_drawers()):
            if self.task_stopping_event.is_set():
                return
            self._refresh_fan_drawer_status(drawer, drawer_index)
            for fan_index, fan in enumerate(drawer.get_all_fans()):
                if self.task_stopping_event.is_set():
                    return
                try:
                    self._refresh_fan_status(drawer, drawer_index, fan, fan_index, FanType.DRAWER)
                except Exception as e:
                    self.log_warning('Failed to update fan status - {}'.format(repr(e)))

        for module_index, module in enumerate(self.chassis.get_all_modules()):
            for fan_index, fan in enumerate(module.get_all_fans()):
                if self.task_stopping_event.is_set():
                    return
                try:
                    self._refresh_fan_status(module, module_index, fan, fan_index, FanType.MODULE)
                except Exception as e:
                    self.log_warning('Failed to update module fan status - {}'.format(repr(e)))

        for psu_index, psu in enumerate(self.chassis.get_all_psus()):
            for fan_index, fan in enumerate(psu.get_all_fans()):
                if self.task_stopping_event.is_set():
                    return
                try:
                    self._refresh_fan_status(psu, psu_index, fan, fan_index, FanType.PSU)
                except Exception as e:
                    self.log_warning('Failed to update PSU fan status - {}'.format(repr(e)))

        self._update_led_color()

        bad_fan_count = FanStatus.get_bad_fan_count()
        if bad_fan_count > 0 and old_bad_fan_count != bad_fan_count:
            self.log_warning("Insufficient number of working fans warning: {} fan{} not working".format(
                bad_fan_count, " is" if bad_fan_count == 1 else "s are"
            ))
        elif old_bad_fan_count > 0 and bad_fan_count == 0:
            self.log_notice("Insufficient number of working fans warning cleared: all fans are back to normal")

        self.log_debug("End fan updating")

    def _refresh_fan_drawer_status(self, fan_drawer, drawer_index):
        drawer_name = try_get(fan_drawer.get_name)
        if drawer_name == NOT_AVAILABLE:
            return

        update_entity_info(self.phy_entity_table, CHASSIS_INFO_KEY, drawer_name, fan_drawer, drawer_index)

        fvs = swsscommon.FieldValuePairs(
            [('presence', str(try_get(fan_drawer.get_presence, False))),
             ('model', str(try_get(fan_drawer.get_model))),
             ('serial', str(try_get(fan_drawer.get_serial))),
             ('status', str(try_get(fan_drawer.get_status))),
             ('is_replaceable', str(try_get(fan_drawer.is_replaceable, False))),
             ])

        self.drawer_table.set(drawer_name, fvs)

    def _refresh_fan_status(self, parent, parent_index, fan, fan_index, fan_type=FanType.DRAWER):
        """
        Get Fan status by platform API and write to database for a given Fan
        :param parent: Parent device of this fan
        :param parent_index: Parent device index
        :param fan: Object representing a platform Fan
        :param fan_index: Index of the Fan object in its parent device
        :param name_prefix: name prefix of Fan object if Fan.get_name not presented
        :return:
        """
        drawer_name = NOT_AVAILABLE if fan_type != FanType.DRAWER else str(try_get(parent.get_name))
        if fan_type == FanType.PSU:
            parent_name = try_get(parent.get_name, default='PSU {}'.format(parent_index + 1))
        elif fan_type == FanType.MODULE:
            parent_name = try_get(parent.get_name, default='Module {}'.format(parent_index + 1))
        else:
            parent_name = drawer_name if drawer_name != NOT_AVAILABLE else CHASSIS_INFO_KEY
        fan_name = try_get(fan.get_name, '{} fan {}'.format(parent_name, fan_index + 1))
        update_entity_info(self.phy_entity_table, parent_name, fan_name, fan, fan_index + 1)
        if fan_name not in self.fan_status_dict:
            self.fan_status_dict[fan_name] = FanStatus(fan, fan_type)

        fan_status = self.fan_status_dict[fan_name]

        speed = NOT_AVAILABLE
        speed_target = NOT_AVAILABLE
        is_under_speed = NOT_AVAILABLE
        is_over_speed = NOT_AVAILABLE
        fan_fault_status = NOT_AVAILABLE
        fan_direction = NOT_AVAILABLE
        is_replaceable = try_get(fan.is_replaceable, False)
        presence = try_get(fan.get_presence, False)
        if presence:
            speed = try_get(fan.get_speed)
            speed_target = try_get(fan.get_target_speed)
            is_under_speed = try_get(fan.is_under_speed)
            is_over_speed = try_get(fan.is_over_speed)
            fan_fault_status = try_get(fan.get_status, False)
            fan_direction = try_get(fan.get_direction)

        set_led = not fan_status.led_initialized
        if fan_status.set_presence(presence):
            set_led = True
            self._log_on_status_changed(fan_status.presence,
                                        'Fan removed warning cleared: {} was inserted'.format(fan_name),
                                        'Fan removed warning: {} was removed from '
                                        'the system, potential overheat hazard'.format(fan_name)
                                        )

        if presence and fan_status.set_fault_status(fan_fault_status):
            set_led = True
            self._log_on_status_changed(fan_status.status,
                                        'Fan fault warning cleared: {} is back to normal'.format(fan_name),
                                        'Fan fault warning: {} is broken'.format(fan_name)
                                        )

        if presence and fan_status.set_under_speed(is_under_speed):
            set_led = True
            self._log_on_status_changed(not fan_status.under_speed,
                                        'Fan low speed warning cleared: {} speed is back to normal'.format(fan_name),
                                        'Fan low speed warning: {} current speed={}, target speed={}'.
                                        format(fan_name, speed, speed_target)
                                        )

        if presence and fan_status.set_over_speed(is_over_speed):
            set_led = True
            self._log_on_status_changed(not fan_status.over_speed,
                                        'Fan high speed warning cleared: {} speed is back to normal'.format(fan_name),
                                        'Fan high speed warning: {} current speed={}, target speed={}'.
                                        format(fan_name, speed, speed_target)
                                        )

        # We don't set PSU led here, PSU led will be handled in psud
        if set_led:
            if fan_type == FanType.DRAWER:
                self._set_fan_led(parent, fan, fan_name, fan_status)

        if fan_fault_status != NOT_AVAILABLE:
            fan_fault_status = fan_status.is_ok()

        fvs = swsscommon.FieldValuePairs(
            [('presence', str(presence)),
             ('drawer_name', drawer_name),
             ('model', str(try_get(fan.get_model))),
             ('serial', str(try_get(fan.get_serial))),
             ('status', str(fan_fault_status)),
             ('direction', str(fan_direction)),
             ('speed', str(speed)),
             ('speed_target', str(speed_target)),
             ('is_under_speed', str(is_under_speed)),
             ('is_over_speed', str(is_over_speed)),
             ('is_replaceable', str(is_replaceable)),
             ('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S'))
             ])

        self.table.set(fan_name, fvs)

    def _set_fan_led(self, fan_drawer, fan, fan_name, fan_status):
        """
        Set fan led according to current status
        :param fan_drawer: Object representing a platform Fan drawer or PSU
        :param fan: Object representing a platform Fan
        :param fan_name: Name of the Fan object in case any vendor not implement Fan.get_name
        :param fan_status: Object representing the FanStatus
        :return:
        """
        try:
            led_color = fan.STATUS_LED_COLOR_GREEN if fan_status.is_ok() else fan.STATUS_LED_COLOR_RED
            fan.set_status_led(led_color)
            fan_drawer.set_status_led(led_color)
        except NotImplementedError as e:
            self.log_warning('Failed to set status LED for fan {}, set_status_led not implemented'.format(fan_name))

        # Set led_initialized to True even if there is NotImplementedError as it is not neccessary to
        # print the warning log again and again. But if there is other exception, we could not
        # reach this line, and it will retry setting led color in the next run.
        fan_status.led_initialized = True

    def _update_led_color(self):
        for fan_name, fan_status in self.fan_status_dict.items():
            if self.task_stopping_event.is_set():
                return
            try:
                fvs = swsscommon.FieldValuePairs([
                    ('led_status', str(try_get(fan_status.fan.get_status_led)))
                ])
            except Exception as e:
                self.log_warning('Failed to get status LED state for fan {} - {}'.format(fan_name, e))
                fvs = swsscommon.FieldValuePairs([
                    ('led_status', NOT_AVAILABLE)
                ])
            self.table.set(fan_name, fvs)

        for drawer in self.chassis.get_all_fan_drawers():
            if self.task_stopping_event.is_set():
                return
            drawer_name = try_get(drawer.get_name)
            if drawer_name == NOT_AVAILABLE:
                continue
            try:
                fvs = swsscommon.FieldValuePairs([
                    ('led_status', str(try_get(drawer.get_status_led)))
                ])
            except Exception as e:
                self.log_warning('Failed to get status LED state for fan drawer')
                fvs = swsscommon.FieldValuePairs([
                    ('led_status', NOT_AVAILABLE)
                ])
            self.drawer_table.set(drawer_name, fvs)


class TemperatureStatus(logger.Logger):
    TEMPERATURE_DIFF_THRESHOLD = 10

    def __init__(self):
        super(TemperatureStatus, self).__init__(SYSLOG_IDENTIFIER)

        self.temperature = None
        self.over_temperature = False
        self.under_temperature = False

    def set_temperature(self, name, temperature):
        """
        Record temperature changes, if it changed too fast, raise a warning.
        :param name: Name of the thermal.
        :param temperature: New temperature value.
        :return:
        """
        if temperature == NOT_AVAILABLE:
            if self.temperature is not None:
                self.log_warning('Temperature of {} became unavailable'.format(name))
                self.temperature = None
            return

        if self.temperature is None:
            self.temperature = temperature
        else:
            diff = abs(temperature - self.temperature)
            if diff > TemperatureStatus.TEMPERATURE_DIFF_THRESHOLD:
                self.log_warning(
                    'Temperature of {} changed too fast, from {} to {}, please check your hardware'.format(
                        name, self.temperature, temperature))
            self.temperature = temperature

    def _check_temperature_value_available(self, temperature, threshold, current_status):
        if temperature == NOT_AVAILABLE or threshold == NOT_AVAILABLE:
            if current_status is True:
                self.log_warning('Thermal temperature or threshold became unavailable, '
                                 'temperature={}, threshold={}'.format(temperature, threshold))
            return False
        return True

    def set_over_temperature(self, temperature, threshold):
        """
        Set over temperature status
        :param temperature: Temperature
        :param threshold: High threshold
        :return: True if over temperature status changed else False
        """
        if not self._check_temperature_value_available(temperature, threshold, self.over_temperature):
            old_status = self.over_temperature
            self.over_temperature = False
            return old_status != self.over_temperature

        status = temperature > threshold
        if status == self.over_temperature:
            return False

        self.over_temperature = status
        return True

    def set_under_temperature(self, temperature, threshold):
        """
        Set over temperature status
        :param temperature: Temperature
        :param threshold: Low threshold
        :return: True if under temperature status changed else False
        """
        if not self._check_temperature_value_available(temperature, threshold, self.under_temperature):
            old_status = self.under_temperature
            self.under_temperature = False
            return old_status != self.under_temperature

        status = temperature < threshold
        if status == self.under_temperature:
            return False

        self.under_temperature = status
        return True


#
# TemperatureUpdater  ======================================================================
#
class TemperatureUpdater(logger.Logger):
    # Temperature information table name in database
    TEMPER_INFO_TABLE_NAME = 'TEMPERATURE_INFO'

    def __init__(self, chassis, task_stopping_event):
        """
        Initializer of TemperatureUpdater
        :param chassis: Object representing a platform chassis
        """
        super(TemperatureUpdater, self).__init__(SYSLOG_IDENTIFIER)

        self.chassis = chassis
        self.task_stopping_event = task_stopping_event
        self.temperature_status_dict = {}
        state_db = daemon_base.db_connect("STATE_DB")
        self.table = swsscommon.Table(state_db, TemperatureUpdater.TEMPER_INFO_TABLE_NAME)
        self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE)
        self.chassis_table = None
        self.all_thermals = set()

        self.is_chassis_system = chassis.is_modular_chassis()
        self.is_smartswitch_dpu = chassis.is_smartswitch() and chassis.is_dpu()
        self.is_chassis_upd_required = self.is_chassis_system or self.is_smartswitch_dpu
        if self.is_chassis_upd_required:
            my_slot = try_get(chassis.get_my_slot if self.is_chassis_system else chassis.get_dpu_id, INVALID_SLOT_OR_DPU)
            if my_slot != INVALID_SLOT_OR_DPU:
                try:
                    # Modular chassis does not have to have table CHASSIS_STATE_DB.
                    # So catch the exception here and ignore it.
                    table_name = TemperatureUpdater.TEMPER_INFO_TABLE_NAME+'_'+str(my_slot)
                    chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")
                    self.chassis_table = swsscommon.Table(chassis_state_db, table_name)
                except Exception as e:
                    self.chassis_table = None

    def __del__(self):
        if self.table:
            table_keys = self.table.getKeys()
            for tk in table_keys:
                self.table._del(tk)
                try:
                    if self.is_chassis_upd_required and self.chassis_table is not None:
                        self.chassis_table._del(tk)
                except Exception as e:
                    # On a chassis system it is possible we may lose connection
                    # to the supervisor and chassisdb. If this happens then we
                    # should simply remove our handle to chassisdb.
                    self.chassis_table = None
        if self.phy_entity_table:
            phy_entity_keys = self.phy_entity_table.getKeys()
            for pek in phy_entity_keys:
                self.phy_entity_table._del(pek)

    def _log_on_status_changed(self, normal_status, normal_log, abnormal_log):
        """
        Log when any status changed
        :param normal_status: Expected status.
        :param normal_log: Log string for expected status.
        :param abnormal_log: Log string for unexpected status
        :return:
        """
        if normal_status:
            self.log_notice(normal_log)
        else:
            self.log_warning(abnormal_log)

    def update(self):
        """
        Update all temperature information to database
        :return:
        """
        self.log_debug("Start temperature updating")
        available_thermals = set()
        for index, thermal in enumerate(self.chassis.get_all_thermals()):
            if self.task_stopping_event.is_set():
                return

            available_thermals.add((thermal, CHASSIS_INFO_KEY, index))
            self._refresh_temperature_status(CHASSIS_INFO_KEY, thermal, index)

        for psu_index, psu in enumerate(self.chassis.get_all_psus()):
            parent_name = try_get(psu.get_name, default='PSU {}'.format(psu_index + 1))
            if psu.get_presence():
                for thermal_index, thermal in enumerate(psu.get_all_thermals()):
                    if self.task_stopping_event.is_set():
                        return

                    available_thermals.add((thermal, parent_name, thermal_index))
                    self._refresh_temperature_status(parent_name, thermal, thermal_index)

        for sfp_index, sfp in enumerate(self.chassis.get_all_sfps()):
            parent_name = 'SFP {}'.format(sfp_index + 1)
            for thermal_index, thermal in enumerate(sfp.get_all_thermals()):
                if self.task_stopping_event.is_set():
                    return

                available_thermals.add((thermal, parent_name, thermal_index))
                self._refresh_temperature_status(parent_name, thermal, thermal_index)

        # As there are no modules present in DPU, this IF condition is not updated to consider DPU chassis
        if self.is_chassis_system:
            for module_index, module in enumerate(self.chassis.get_all_modules()):
                module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1))

                for thermal_index, thermal in enumerate(module.get_all_thermals()):
                    if self.task_stopping_event.is_set():
                        return

                    available_thermals.add((thermal, module_name, thermal_index))
                    self._refresh_temperature_status(module_name, thermal, thermal_index)

                for sfp_index, sfp in enumerate(module.get_all_sfps()):
                    sfp_name = '{} SFP {}'.format(module_name, sfp_index + 1)
                    for thermal_index, thermal in enumerate(sfp.get_all_thermals()):
                        if self.task_stopping_event.is_set():
                            return

                        available_thermals.add((thermal, sfp_name, thermal_index))
                        self._refresh_temperature_status(sfp_name, thermal, thermal_index)

                for psu_index, psu in enumerate(module.get_all_psus()):
                    if psu.get_presence():
                        psu_name = try_get(psu.get_name, default='{} PSU {}'.format(module_name, psu_index + 1))
                        for thermal_index, thermal in enumerate(psu.get_all_thermals()):
                            if self.task_stopping_event.is_set():
                                return
                            available_thermals.add((thermal, psu_name, thermal_index))
                            self._refresh_temperature_status(psu_name, thermal, thermal_index)

        thermals_to_remove = self.all_thermals - available_thermals
        self.all_thermals = available_thermals
        for thermal, parent_name, thermal_index in thermals_to_remove:
            self._remove_thermal_from_db(thermal, parent_name, thermal_index)

        self.log_debug("End temperature updating")

    def _refresh_temperature_status(self, parent_name, thermal, thermal_index):
        """
        Get temperature status by platform API and write to database
        :param parent_name: Name of parent device of the thermal object
        :param thermal: Object representing a platform thermal zone
        :param thermal_index: Index of the thermal object in platform chassis
        :return:
        """
        try:
            name = try_get(thermal.get_name, '{} Thermal {}'.format(parent_name, thermal_index + 1))

            # Only save entity info for thermals that belong to chassis and PSU
            # for SFP thermal, they don't need save entity info because snmp can deduce the relation from TRANSCEIVER_DOM_SENSOR
            # and as we save logical port in TRANSCEIVER_INFO table, for split cable, a SFP thermal might have multiple parent
            # logical port
            if 'SFP' not in parent_name:
                update_entity_info(self.phy_entity_table, parent_name, name, thermal, thermal_index + 1)

            if name not in self.temperature_status_dict:
                self.temperature_status_dict[name] = TemperatureStatus()

            temperature_status = self.temperature_status_dict[name]

            high_threshold = NOT_AVAILABLE
            low_threshold = NOT_AVAILABLE
            high_critical_threshold = NOT_AVAILABLE
            low_critical_threshold = NOT_AVAILABLE
            maximum_temperature = NOT_AVAILABLE
            minimum_temperature = NOT_AVAILABLE
            temperature = try_get(thermal.get_temperature)
            is_replaceable = try_get(thermal.is_replaceable, False)
            if temperature != NOT_AVAILABLE:
                temperature_status.set_temperature(name, temperature)
                minimum_temperature = try_get(thermal.get_minimum_recorded)
                maximum_temperature = try_get(thermal.get_maximum_recorded)
                high_threshold = try_get(thermal.get_high_threshold)
                low_threshold = try_get(thermal.get_low_threshold)
                high_critical_threshold = try_get(thermal.get_high_critical_threshold)
                low_critical_threshold = try_get(thermal.get_low_critical_threshold)

            warning = False
            if temperature != NOT_AVAILABLE and temperature_status.set_over_temperature(temperature, high_threshold):
                self._log_on_status_changed(not temperature_status.over_temperature,
                                            'High temperature warning cleared: {} temperature restored to {}C, high threshold {}C'.
                                            format(name, temperature, high_threshold),
                                            'High temperature warning: {} current temperature {}C, high threshold {}C'.
                                            format(name, temperature, high_threshold)
                                            )
            warning = warning | temperature_status.over_temperature

            if temperature != NOT_AVAILABLE and temperature_status.set_under_temperature(temperature, low_threshold):
                self._log_on_status_changed(not temperature_status.under_temperature,
                                            'Low temperature warning cleared: {} temperature restored to {}C, low threshold {}C'.
                                            format(name, temperature, low_threshold),
                                            'Low temperature warning: {} current temperature {}C, low threshold {}C'.
                                            format(name, temperature, low_threshold)
                                            )
            warning = warning | temperature_status.under_temperature

            fvs = swsscommon.FieldValuePairs(
                [('temperature', str(temperature)),
                ('minimum_temperature', str(minimum_temperature)),
                ('maximum_temperature', str(maximum_temperature)),
                ('high_threshold', str(high_threshold)),
                ('low_threshold', str(low_threshold)),
                ('warning_status', str(warning)),
                ('critical_high_threshold', str(high_critical_threshold)),
                ('critical_low_threshold', str(low_critical_threshold)),
                ('is_replaceable', str(is_replaceable)),
                ('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S'))
                ])

            self.table.set(name, fvs)
            if self.is_chassis_upd_required and self.chassis_table is not None:
                self.chassis_table.set(name, fvs)
        except Exception as e:
            self.log_warning('Failed to update thermal status for {} - {}'.format(name, repr(e)))

    def _remove_thermal_from_db(self, thermal, parent_name, thermal_index):
        name = try_get(thermal.get_name, '{} Thermal {}'.format(parent_name, thermal_index + 1))
        self.table._del(name)

        if self.chassis_table is not None:
            self.chassis_table._del(name)


class ThermalMonitor(ThreadTaskBase):
    def __init__(
        self, chassis, initial_interval, update_interval, update_elapsed_threshold
    ):
        """
        Initializer for ThermalMonitor
        :param chassis: Object representing a platform chassis
        """
        super(ThermalMonitor, self).__init__()

        # Initial update interval
        self.initial_interval = initial_interval
        # Update interval value
        self.update_interval = update_interval
        # Update elapse threshold. If update used time is larger than the value, generate a warning log
        self.update_elapsed_threshold = update_elapsed_threshold

        self.wait_time = self.initial_interval

        # TODO: Refactor to eliminate the need for this Logger instance
        self.logger = logger.Logger(SYSLOG_IDENTIFIER)

        # Set minimum logging level to INFO
        self.logger.set_min_log_priority_info()

        self.fan_updater = FanUpdater(chassis, self.task_stopping_event)
        self.temperature_updater = TemperatureUpdater(chassis, self.task_stopping_event)

    def main(self):
        begin = time.time()
        self.fan_updater.update()
        self.temperature_updater.update()
        elapsed = time.time() - begin
        if elapsed < self.update_interval:
            self.wait_time = self.update_interval - elapsed
        else:
            self.wait_time = self.initial_interval

        if elapsed > self.update_elapsed_threshold:
            self.logger.log_warning('Update fan and temperature status took {} seconds, '
                                    'there might be performance risk'.format(elapsed))

    def task_worker(self):
        """
        Thread function to handle Fan status update and temperature status update
        :return:
        """
        self.logger.log_info("Start thermal monitoring loop")

        # Start loop to update fan, temperature info in DB periodically
        while not self.task_stopping_event.wait(self.wait_time):
            self.main()

        self.logger.log_info("Stop thermal monitoring loop")


#
# Daemon =======================================================================
#
class ThermalControlDaemon(daemon_base.DaemonBase):
    # Interval to run thermal control logic
    INTERVAL = 60
    RUN_POLICY_WARN_THRESHOLD_SECS = 30
    FAST_START_INTERVAL = 15

    POLICY_FILE = '/usr/share/sonic/platform/thermal_policy.json'

    def __init__(
        self,
        thermal_monitor_initial_interval,
        thermal_monitor_update_interval,
        thermal_monitor_update_elapsed_threshold,
    ):
        """
        Initializer of ThermalControlDaemon
        """
        super(ThermalControlDaemon, self).__init__(SYSLOG_IDENTIFIER)

        # Set minimum logging level to INFO
        self.set_min_log_priority_info()

        self.stop_event = threading.Event()

        self.wait_time = self.INTERVAL

        try:
            self.chassis = sonic_platform.platform.Platform().get_chassis()
        except Exception as e:
            self.log_error("Failed to get chassis due to {}".format(repr(e)))
            sys.exit(CHASSIS_GET_ERROR)

        self.thermal_monitor = ThermalMonitor(
            self.chassis,
            thermal_monitor_initial_interval,
            thermal_monitor_update_interval,
            thermal_monitor_update_elapsed_threshold
        )
        self.thermal_monitor.task_run()

        self.thermal_manager = None
        try:
            self.thermal_manager = self.chassis.get_thermal_manager()
            if self.thermal_manager:
                self.thermal_manager.initialize()
                self.thermal_manager.load(ThermalControlDaemon.POLICY_FILE)
                self.thermal_manager.init_thermal_algorithm(self.chassis)
                # Use thermal manager interval if it's available
                self.wait_time = self.thermal_manager.get_interval()
        except NotImplementedError:
            self.log_warning('Thermal manager is not supported on this platform')
        except Exception as e:
            self.log_error('Caught exception while initializing thermal manager - {}'.format(repr(e)))

    def deinit(self):
        """
        Deinitializer of ThermalControlDaemon
        """
        try:
            if self.thermal_manager:
                self.thermal_manager.deinitialize()
        except Exception as e:
            self.log_error('Caught exception while destroying thermal manager - {}'.format(repr(e)))

        self.thermal_monitor.task_stop()

    # Override signal handler from DaemonBase
    def signal_handler(self, sig, frame):
        """
        Signal handler
        :param sig: Signal number
        :param frame: not used
        :return:
        """
        FATAL_SIGNALS = [signal.SIGINT, signal.SIGTERM]
        NONFATAL_SIGNALS = [signal.SIGHUP]

        global exit_code

        if sig in FATAL_SIGNALS:
            self.log_info("Caught signal '{}' - exiting...".format(SIGNALS_TO_NAMES_DICT[sig]))
            exit_code = 128 + sig  # Make sure we exit with a non-zero code so that supervisor will try to restart us
            self.thermal_monitor.task_stop()
            if self.thermal_manager:
                self.thermal_manager.stop()
            self.stop_event.set()
        elif sig in NONFATAL_SIGNALS:
            self.log_info("Caught signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig]))
        else:
            self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig]))

    # Main daemon logic
    def run(self):
        """
        Run main logical of this daemon
        :return:
        """
        if self.stop_event.wait(self.wait_time):
            # We received a fatal signal
            return False

        begin = time.time()
        try:
            if self.thermal_manager:
                self.thermal_manager.run_policy(self.chassis)
        except Exception as e:
            self.log_error('Caught exception while running thermal policy - {}'.format(repr(e)))

        interval = self.thermal_manager.get_interval() if self.thermal_manager else self.INTERVAL
        elapsed = time.time() - begin
        if elapsed < interval:
            self.wait_time = interval - elapsed
        else:
            self.wait_time = self.FAST_START_INTERVAL

        if elapsed > self.RUN_POLICY_WARN_THRESHOLD_SECS:
            self.log_warning('Thermal policy execution took {} seconds, '
                             'there might be performance risk'.format(elapsed))

        return True


#
# Main =========================================================================
#
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--thermal-monitor-initial-interval', type=int, default=5)
    parser.add_argument('--thermal-monitor-update-interval', type=int, default=60)
    parser.add_argument('--thermal-monitor-update-elapsed-threshold', type=int, default=30)
    args = parser.parse_args()

    thermal_control = ThermalControlDaemon(
        args.thermal_monitor_initial_interval,
        args.thermal_monitor_update_interval,
        args.thermal_monitor_update_elapsed_threshold
    )

    thermal_control.log_info("Starting up...")

    while thermal_control.run():
        pass

    thermal_control.log_info("Shutting down with exit code {}...".format(exit_code))

    thermal_control.deinit()

    return exit_code


if __name__ == '__main__':
    sys.exit(main())
